{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import steward as st\n",
    "import matplotlib.pyplot as plt\n",
    "import pickle\n",
    "import xgboost\n",
    "from sklearn.metrics import roc_auc_score\n",
    "%matplotlib inline\n",
    "from src import build\n",
    "from src import train\n",
    "from src.feature_cols import to_drop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "build.build_all()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "feature_list = [\n",
    "#     'basic_preprocess/cont_r_train_30W',\n",
    "    'basic_preprocess/cont_train_30W',\n",
    "    'basic_preprocess/conc_train_30W',\n",
    "    \n",
    "#     'index/index_train_30W'\n",
    "    \n",
    "    'feature/rank1_train_30W',\n",
    "    'feature/rank2_train_30W',\n",
    "    'feature/rank3_train_30W',\n",
    "    \n",
    "    'feature/history1_train_30W',\n",
    "    'feature/history2_train_30W',\n",
    "    'feature/history3_train_30W',\n",
    "]\n",
    "\n",
    "y = 'basic_preprocess/y_train_30W'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "__x = []\n",
    "for name in feature_list:\n",
    "    __x.append(st.get_instance(name).load())\n",
    "\n",
    "y_df = st.get_instance(y).load()\n",
    "__x[3].index = __x[0].index\n",
    "X_df = pd.concat(__x, copy=False, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING:root:df previous cols: 687 origin to_drop cols: 161 will drop cols: 159 now df cols: 528\n"
     ]
    }
   ],
   "source": [
    "pearson99=['price_last_cont9',\n",
    " '1month_cont3',\n",
    " 'max_rank_order_basicroom_rank',\n",
    " 'history_area_cont3',\n",
    " 'price_last_cont7',\n",
    " 'ordinal_rank_order_basicroom_rank',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_norm',\n",
    " 'history_cont6',\n",
    " 'average_rank_orderby_order_room_rankby_room_roomtag_2_reverse_norm',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio',\n",
    " '1month_cont8',\n",
    " '3month_cont4',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_3_reverse_norm',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank_reverse',\n",
    " 'average_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_reverse_norm',\n",
    " '1week_cont5',\n",
    " 'roomtag_6_lastord_0.0',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_realratio_norm',\n",
    " 'average_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " '3month_cont3',\n",
    " '1month_cont6',\n",
    " 'average_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm',\n",
    " 'roomtag_6_lastord_nan',\n",
    " 'history_area_cont2',\n",
    " 'basic_minprice_lastord',\n",
    " '1week_cont7',\n",
    " 'price_last_cont13',\n",
    " 'roomservice_3_lastord_nan',\n",
    " '1month_cont2',\n",
    " 'user_avgprice',\n",
    " 'price_last_cont11',\n",
    " 'roomtag_4_1.0',\n",
    " 'history_cont2',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " 'max_rank_order_basicroom_price_deduct',\n",
    " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
    " 'min_rank_order_basicroom_price_deduct',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_minarea_reverse_norm',\n",
    " 'user_roomservice_7_0ratio_1week',\n",
    " 'max_rank_orderid_basicroomrank_basic_minarea_norm',\n",
    " 'average_rank_orderid_basicroomrank_basic_maxarea_reverse_norm',\n",
    " 'star_lastord_nan',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_ordnumratio_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_realratio_norm',\n",
    " 'average_rank_order_basicroom_rank',\n",
    " '1week_cont2',\n",
    " 'average_rank_orderby_order_room_rankby_room_roomtag_3_reverse_norm',\n",
    " 'price_last_cont8',\n",
    " 'price_last_cont19',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_reverse_norm',\n",
    " 'price_last_cont10',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_norm',\n",
    " '1week_cont3',\n",
    " 'average_rank_orderid_basicroomrank_basic_minarea_reverse_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea_norm',\n",
    " '1week_cont8',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_2_reverse_norm',\n",
    " '3month_cont8',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio_norm',\n",
    " 'ordinal_rank_order_basicroom_price_deduct',\n",
    " '1week_cont6',\n",
    " 'roomservice_5_lastord_nan',\n",
    " 'dense_rank_order_basicroom_rank',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_3_norm',\n",
    " 'roomservice_8_lastord_nan',\n",
    " 'average_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_reverse_norm',\n",
    " 'basic_maxarea',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio',\n",
    " '3month_cont6',\n",
    " 'price_last_cont20',\n",
    " 'user_medprice_1week',\n",
    " 'roomtag_6_islast',\n",
    " 'average_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
    " 'price_last_cont16',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea_reverse_norm',\n",
    " '3month_cont7',\n",
    " 'roomtag_5_lastord_nan',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank',\n",
    " 'price_last_cont3',\n",
    " 'user_activation',\n",
    " 'price_last_cont17',\n",
    " 'price_last_cont12',\n",
    " 'dense_rank_orderby_order_room_rankby_room_rank',\n",
    " 'user_roomservice_7_0ratio_1month',\n",
    " '1month_cont7',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank_norm',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
    " 'average_rank_orderid_basicroomrank_basic_30days_realratio_reverse_norm',\n",
    " '1month_cont4',\n",
    " 'roomtag_5_1.0',\n",
    " 'roomtag_1_1.0',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'roomservice_7_1.0',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_reverse',\n",
    " 'max_rank_orderby_order_room_rankby_room_returnvalue_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_reverse',\n",
    " 'user_roomservice_7_0ratio_3month',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'roomservice_5_1.0',\n",
    " 'price_last_cont21',\n",
    " 'average_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_norm',\n",
    " 'roomtag_4_lastord_nan',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_norm',\n",
    " 'price_last_cont18',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_2_norm',\n",
    " 'history_cont5',\n",
    " 'max_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_reverse_norm',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_reverse',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
    " '1week_cont4',\n",
    " 'min_rank_order_basicroom_rank',\n",
    " 'max_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_norm',\n",
    " '3month_cont2',\n",
    " 'roomservice_2_1.0',\n",
    " 'history_cont4',\n",
    " 'price_last_cont15']\n",
    "\n",
    "pearson98 = ['price_last_cont9',\n",
    " '1month_cont3',\n",
    " 'max_rank_order_basicroom_rank',\n",
    " 'history_area_cont3',\n",
    " 'price_last_cont7',\n",
    " 'ordinal_rank_order_basicroom_rank',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_norm',\n",
    " 'history_cont6',\n",
    " 'average_rank_orderby_order_room_rankby_room_roomtag_2_reverse_norm',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio',\n",
    " 'roomservice_4_lastord_nan',\n",
    " '1month_cont8',\n",
    " '3month_cont4',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_3_reverse_norm',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank_reverse',\n",
    " 'dense_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " 'average_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_reverse_norm',\n",
    " '1week_cont5',\n",
    " 'roomtag_6_lastord_0.0',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea_reverse',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_realratio_norm',\n",
    " 'average_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " '3month_cont3',\n",
    " '1month_cont6',\n",
    " 'average_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm',\n",
    " 'roomtag_6_lastord_nan',\n",
    " 'history_area_cont2',\n",
    " 'basic_minprice_lastord',\n",
    " '1week_cont7',\n",
    " 'price_last_cont13',\n",
    " 'roomservice_3_lastord_nan',\n",
    " '1month_cont2',\n",
    " 'history_cont1',\n",
    " 'user_avgprice',\n",
    " 'price_last_cont11',\n",
    " 'roomtag_4_1.0',\n",
    " 'history_cont2',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " 'max_rank_order_basicroom_price_deduct',\n",
    " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
    " 'min_rank_order_basicroom_price_deduct',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_minarea_reverse_norm',\n",
    " 'user_roomservice_7_0ratio_1week',\n",
    " 'max_rank_orderid_basicroomrank_basic_minarea_norm',\n",
    " 'average_rank_orderid_basicroomrank_basic_maxarea_reverse_norm',\n",
    " 'star_lastord_nan',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_ordnumratio_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_realratio_norm',\n",
    " 'average_rank_order_basicroom_rank',\n",
    " '1week_cont2',\n",
    " 'average_rank_orderby_order_room_rankby_room_roomtag_3_reverse_norm',\n",
    " 'roomservice_6_lastord_nan',\n",
    " 'price_last_cont8',\n",
    " 'price_last_cont19',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_reverse_norm',\n",
    " 'price_last_cont10',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_norm',\n",
    " '1week_cont3',\n",
    " 'average_rank_orderid_basicroomrank_basic_minarea_reverse_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea_norm',\n",
    " '1week_cont8',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_2_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_returnvalue_reverse',\n",
    " '3month_cont8',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio_norm',\n",
    " 'ordinal_rank_order_basicroom_price_deduct',\n",
    " '1week_cont6',\n",
    " 'roomservice_5_lastord_nan',\n",
    " 'dense_rank_order_basicroom_rank',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_3_norm',\n",
    " 'roomservice_8_lastord_nan',\n",
    " 'average_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_reverse_norm',\n",
    " 'basic_maxarea',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct',\n",
    " 'user_medprice_1month',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio',\n",
    " '3month_cont6',\n",
    " 'price_last_cont20',\n",
    " 'user_medprice_1week',\n",
    " 'roomtag_6_islast',\n",
    " 'average_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
    " 'price_last_cont16',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea_reverse_norm',\n",
    " '3month_cont7',\n",
    " 'roomtag_5_lastord_nan',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank',\n",
    " 'price_last_cont3',\n",
    " 'user_activation',\n",
    " 'price_last_cont17',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " 'price_last_cont12',\n",
    " 'dense_rank_orderby_order_room_rankby_room_rank',\n",
    " 'price_last_cont14',\n",
    " 'user_roomservice_7_0ratio_1month',\n",
    " '1month_cont7',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank_norm',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
    " 'average_rank_orderid_basicroomrank_basic_30days_realratio_reverse_norm',\n",
    " '1month_cont4',\n",
    " 'roomtag_5_1.0',\n",
    " 'roomtag_1_1.0',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'roomservice_7_1.0',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_reverse',\n",
    " 'max_rank_orderby_order_room_rankby_room_returnvalue_norm',\n",
    " 'max_rank_order_basicroom_roomtag_2',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_reverse',\n",
    " 'user_roomservice_7_0ratio_3month',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'roomservice_5_1.0',\n",
    " 'price_last_cont21',\n",
    " 'average_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_norm',\n",
    " 'roomtag_4_lastord_nan',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_norm',\n",
    " 'price_last_cont18',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_2_norm',\n",
    " 'history_cont5',\n",
    " 'max_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_reverse_norm',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_reverse',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
    " '1week_cont4',\n",
    " 'min_rank_order_basicroom_rank',\n",
    " '1month_cont5',\n",
    " 'max_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_norm',\n",
    " '3month_cont2',\n",
    " 'roomservice_2_1.0',\n",
    " 'history_cont4',\n",
    " 'price_last_cont15']\n",
    "\n",
    "pearson97 =['history_area_cont1',\n",
    " 'price_last_cont9',\n",
    " 'dense_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
    " '1month_cont3',\n",
    " 'max_rank_order_basicroom_rank',\n",
    " 'history_area_cont3',\n",
    " 'price_last_cont7',\n",
    " 'ordinal_rank_order_basicroom_rank',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_norm',\n",
    " 'history_cont6',\n",
    " 'average_rank_orderby_order_room_rankby_room_roomtag_2_reverse_norm',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio',\n",
    " 'roomservice_4_lastord_nan',\n",
    " '1month_cont8',\n",
    " '3month_cont4',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_3_reverse_norm',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank_reverse',\n",
    " 'dense_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse',\n",
    " 'average_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_reverse_norm',\n",
    " '1week_cont5',\n",
    " 'roomtag_6_lastord_0.0',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea_reverse',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_realratio_norm',\n",
    " 'average_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " '3month_cont3',\n",
    " 'average_rank_orderid_basicroomrank_basic_maxarea',\n",
    " '1month_cont6',\n",
    " 'average_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm',\n",
    " 'roomtag_6_lastord_nan',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea',\n",
    " 'history_area_cont2',\n",
    " 'basic_minprice_lastord',\n",
    " '1week_cont7',\n",
    " 'price_last_cont13',\n",
    " 'roomservice_3_lastord_nan',\n",
    " '1month_cont2',\n",
    " 'history_cont1',\n",
    " 'user_avgprice',\n",
    " 'price_last_cont11',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_realratio',\n",
    " 'roomtag_4_1.0',\n",
    " 'history_cont2',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " 'max_rank_order_basicroom_price_deduct',\n",
    " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
    " 'min_rank_order_basicroom_price_deduct',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_2',\n",
    " 'max_rank_orderid_basicroomrank_basic_minarea_reverse_norm',\n",
    " 'user_roomservice_7_0ratio_1week',\n",
    " 'max_rank_orderid_basicroomrank_basic_minarea_norm',\n",
    " 'average_rank_orderid_basicroomrank_basic_maxarea_reverse_norm',\n",
    " 'min_rank_orderid_basicroomrank_basic_week_ordernum_ratio',\n",
    " 'star_lastord_nan',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_ordnumratio_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_realratio_norm',\n",
    " 'average_rank_order_basicroom_rank',\n",
    " '1week_cont2',\n",
    " 'average_rank_orderby_order_room_rankby_room_roomtag_3_reverse_norm',\n",
    " 'roomservice_6_lastord_nan',\n",
    " 'price_last_cont8',\n",
    " 'price_last_cont19',\n",
    " 'roomtag_5_islast',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_reverse_norm',\n",
    " 'dense_rank_orderid_basicroomrank_basic_maxarea_reverse',\n",
    " 'price_last_cont10',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_norm',\n",
    " '1week_cont3',\n",
    " 'average_rank_orderid_basicroomrank_basic_minarea_reverse_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_realratio_reverse_norm',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea_norm',\n",
    " '1week_cont8',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_2_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_returnvalue_reverse',\n",
    " '3month_cont8',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio_norm',\n",
    " 'ordinal_rank_order_basicroom_price_deduct',\n",
    " '1week_cont6',\n",
    " 'roomservice_5_lastord_nan',\n",
    " 'dense_rank_order_basicroom_rank',\n",
    " 'min_rank_orderid_basicroomrank_basic_30days_realratio_reverse',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_3_norm',\n",
    " 'roomservice_8_lastord_nan',\n",
    " 'average_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_reverse_norm',\n",
    " 'basic_maxarea',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct',\n",
    " 'user_medprice_1month',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio',\n",
    " '3month_cont6',\n",
    " 'price_last_cont20',\n",
    " 'user_medprice_1week',\n",
    " 'roomtag_6_islast',\n",
    " 'average_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
    " 'min_rank_orderid_basicroomrank_basic_week_ordernum_ratio_norm',\n",
    " 'price_last_cont16',\n",
    " 'dense_rank_order_basicroom_price_deduct',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
    " '3month_cont5',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
    " 'max_rank_orderid_basicroomrank_basic_maxarea_reverse_norm',\n",
    " '3month_cont7',\n",
    " 'roomtag_5_lastord_nan',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank',\n",
    " 'price_last_cont3',\n",
    " 'user_activation',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_2_reverse',\n",
    " 'price_last_cont17',\n",
    " 'dense_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
    " 'price_last_cont12',\n",
    " 'dense_rank_orderby_order_room_rankby_room_rank',\n",
    " 'price_last_cont14',\n",
    " 'average_rank_orderid_basicroomrank_basic_maxarea_reverse',\n",
    " 'user_roomservice_7_0ratio_1month',\n",
    " '1month_cont7',\n",
    " 'min_rank_orderby_order_room_rankby_room_rank_norm',\n",
    " 'min_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
    " 'average_rank_orderid_basicroomrank_basic_30days_realratio_reverse_norm',\n",
    " '1month_cont4',\n",
    " 'roomtag_5_1.0',\n",
    " 'roomtag_1_1.0',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'roomservice_7_1.0',\n",
    " 'max_rank_orderid_basicroomrank_basic_comment_ratio_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_reverse',\n",
    " 'max_rank_orderby_order_room_rankby_room_returnvalue_norm',\n",
    " 'max_rank_order_basicroom_roomtag_2',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_reverse',\n",
    " 'user_roomservice_7_0ratio_3month',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_norm',\n",
    " 'average_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
    " 'roomservice_5_1.0',\n",
    " 'price_last_cont21',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_realratio_reverse',\n",
    " 'average_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank',\n",
    " 'max_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm',\n",
    " 'max_rank_orderby_order_room_rankby_room_price_deduct_norm',\n",
    " 'roomtag_4_lastord_nan',\n",
    " 'max_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_norm',\n",
    " 'price_last_cont18',\n",
    " 'max_rank_orderby_order_room_rankby_room_roomtag_2_norm',\n",
    " 'history_cont5',\n",
    " 'max_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_reverse_norm',\n",
    " 'price_last_conc40',\n",
    " 'min_rank_orderby_order_room_rankby_room_price_deduct_reverse',\n",
    " 'max_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
    " '1week_cont4',\n",
    " 'min_rank_order_basicroom_rank',\n",
    " '1month_cont5',\n",
    " 'max_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_norm',\n",
    " '3month_cont2',\n",
    " 'roomservice_2_1.0',\n",
    " 'history_cont4',\n",
    " 'price_last_cont15']\n",
    "\n",
    "st.tools.drop_if_possible(X_df, pearson97)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "start_training...\n",
      "Fold 1/4 Score: 0.469830 \n",
      "start_training...\n",
      "Fold 2/4 Score: 0.465438 \n",
      "start_training...\n",
      "Fold 3/4 Score: 0.466806 \n",
      "start_training...\n",
      "Fold 4/4 Score: 0.480790 \n",
      "Avg Score: 0.470716 \n"
     ]
    }
   ],
   "source": [
    "model_para = {\n",
    "    'objective': 'rank:pairwise',\n",
    "    'n_estimators': 276,\n",
    "#     'scale_pos_weight': 5,\n",
    "    'learning_rate': 0.3,\n",
    "}\n",
    "\n",
    "train.train_fold(X_df, y_df, model_para=model_para, save=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,\n",
       "       gamma=0, learning_rate=0.3, max_delta_step=0, max_depth=3,\n",
       "       min_child_weight=1, missing=None, n_estimators=276, nthread=-1,\n",
       "       objective='rank:pairwise', reg_alpha=0, reg_lambda=1,\n",
       "       scale_pos_weight=1, seed=0, silent=True, subsample=1)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model_para = {\n",
    "    'objective': 'rank:pairwise',\n",
    "    'n_estimators': 276,\n",
    "#     'scale_pos_weight': 5,\n",
    "    'learning_rate': 0.3,\n",
    "}\n",
    "train.train_all(X_df, y_df, model_para=model_para)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "result = st.all_pair_pearsonr(X_df.ix[:, 0: 10])()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = X_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['history_area_cont1',\n",
       " 'price_last_cont9',\n",
       " 'dense_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
       " '1month_cont3',\n",
       " 'max_rank_order_basicroom_rank',\n",
       " 'history_area_cont3',\n",
       " 'price_last_cont7',\n",
       " 'ordinal_rank_order_basicroom_rank',\n",
       " 'max_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse_norm',\n",
       " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_norm',\n",
       " 'history_cont6',\n",
       " 'average_rank_orderby_order_room_rankby_room_roomtag_2_reverse_norm',\n",
       " 'min_rank_orderid_basicroomrank_basic_comment_ratio',\n",
       " 'roomservice_4_lastord_nan',\n",
       " '1month_cont8',\n",
       " '3month_cont4',\n",
       " 'max_rank_orderby_order_room_rankby_room_roomtag_3_reverse_norm',\n",
       " 'min_rank_orderby_order_room_rankby_room_rank_reverse',\n",
       " 'dense_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
       " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse',\n",
       " 'average_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm',\n",
       " 'average_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_reverse_norm',\n",
       " '1week_cont5',\n",
       " 'roomtag_6_lastord_0.0',\n",
       " 'max_rank_orderid_basicroomrank_basic_maxarea_reverse',\n",
       " 'max_rank_orderid_basicroomrank_basic_30days_realratio_norm',\n",
       " 'average_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
       " '3month_cont3',\n",
       " 'average_rank_orderid_basicroomrank_basic_maxarea',\n",
       " '1month_cont6',\n",
       " 'average_rank_orderby_order_room_rankby_room_room_30days_realratio_reverse_norm',\n",
       " 'max_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm',\n",
       " 'roomtag_6_lastord_nan',\n",
       " 'max_rank_orderid_basicroomrank_basic_maxarea',\n",
       " 'history_area_cont2',\n",
       " 'basic_minprice_lastord',\n",
       " '1week_cont7',\n",
       " 'price_last_cont13',\n",
       " 'roomservice_3_lastord_nan',\n",
       " '1month_cont2',\n",
       " 'history_cont1',\n",
       " 'user_avgprice',\n",
       " 'price_last_cont11',\n",
       " 'max_rank_orderid_basicroomrank_basic_30days_realratio',\n",
       " 'roomtag_4_1.0',\n",
       " 'history_cont2',\n",
       " 'max_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
       " 'max_rank_order_basicroom_price_deduct',\n",
       " 'max_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
       " 'min_rank_order_basicroom_price_deduct',\n",
       " 'min_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
       " 'max_rank_orderby_order_room_rankby_room_roomtag_2',\n",
       " 'max_rank_orderid_basicroomrank_basic_minarea_reverse_norm',\n",
       " 'user_roomservice_7_0ratio_1week',\n",
       " 'max_rank_orderid_basicroomrank_basic_minarea_norm',\n",
       " 'average_rank_orderid_basicroomrank_basic_maxarea_reverse_norm',\n",
       " 'min_rank_orderid_basicroomrank_basic_week_ordernum_ratio',\n",
       " 'star_lastord_nan',\n",
       " 'max_rank_orderid_basicroomrank_basic_30days_ordnumratio_norm',\n",
       " 'max_rank_orderby_order_room_rankby_room_room_30days_realratio_norm',\n",
       " 'average_rank_order_basicroom_rank',\n",
       " '1week_cont2',\n",
       " 'average_rank_orderby_order_room_rankby_room_roomtag_3_reverse_norm',\n",
       " 'roomservice_6_lastord_nan',\n",
       " 'price_last_cont8',\n",
       " 'price_last_cont19',\n",
       " 'roomtag_5_islast',\n",
       " 'max_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_reverse_norm',\n",
       " 'dense_rank_orderid_basicroomrank_basic_maxarea_reverse',\n",
       " 'price_last_cont10',\n",
       " 'min_rank_orderby_order_room_rankby_room_price_deduct_norm',\n",
       " '1week_cont3',\n",
       " 'average_rank_orderid_basicroomrank_basic_minarea_reverse_norm',\n",
       " 'average_rank_orderby_order_room_rankby_room_returnvalue_reverse_norm',\n",
       " 'max_rank_orderid_basicroomrank_basic_30days_realratio_reverse_norm',\n",
       " 'max_rank_orderid_basicroomrank_basic_maxarea_norm',\n",
       " '1week_cont8',\n",
       " 'min_rank_orderby_order_room_rankby_room_price_deduct',\n",
       " 'max_rank_orderby_order_room_rankby_room_roomtag_2_reverse_norm',\n",
       " 'max_rank_orderby_order_room_rankby_room_returnvalue_reverse',\n",
       " '3month_cont8',\n",
       " 'min_rank_orderid_basicroomrank_basic_comment_ratio_norm',\n",
       " 'ordinal_rank_order_basicroom_price_deduct',\n",
       " '1week_cont6',\n",
       " 'roomservice_5_lastord_nan',\n",
       " 'dense_rank_order_basicroom_rank',\n",
       " 'min_rank_orderid_basicroomrank_basic_30days_realratio_reverse',\n",
       " 'max_rank_orderby_order_room_rankby_room_roomtag_3_norm',\n",
       " 'roomservice_8_lastord_nan',\n",
       " 'average_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_reverse_norm',\n",
       " 'basic_maxarea',\n",
       " 'max_rank_orderby_order_room_rankby_room_price_deduct',\n",
       " 'user_medprice_1month',\n",
       " 'max_rank_orderid_basicroomrank_basic_comment_ratio',\n",
       " '3month_cont6',\n",
       " 'price_last_cont20',\n",
       " 'user_medprice_1week',\n",
       " 'roomtag_6_islast',\n",
       " 'average_rank_orderid_basicroomrank_basic_week_ordernum_ratio_reverse_norm',\n",
       " 'min_rank_orderid_basicroomrank_basic_week_ordernum_ratio_norm',\n",
       " 'price_last_cont16',\n",
       " 'dense_rank_order_basicroom_price_deduct',\n",
       " 'min_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
       " '3month_cont5',\n",
       " 'max_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
       " 'max_rank_orderid_basicroomrank_basic_maxarea_reverse_norm',\n",
       " '3month_cont7',\n",
       " 'roomtag_5_lastord_nan',\n",
       " 'min_rank_orderby_order_room_rankby_room_rank',\n",
       " 'price_last_cont3',\n",
       " 'user_activation',\n",
       " 'max_rank_orderby_order_room_rankby_room_roomtag_2_reverse',\n",
       " 'price_last_cont17',\n",
       " 'dense_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
       " 'min_rank_orderid_basicroomrank_basic_comment_ratio_reverse_norm',\n",
       " 'price_last_cont12',\n",
       " 'dense_rank_orderby_order_room_rankby_room_rank',\n",
       " 'price_last_cont14',\n",
       " 'average_rank_orderid_basicroomrank_basic_maxarea_reverse',\n",
       " 'user_roomservice_7_0ratio_1month',\n",
       " '1month_cont7',\n",
       " 'min_rank_orderby_order_room_rankby_room_rank_norm',\n",
       " 'min_rank_orderid_basicroomrank_basic_comment_ratio_reverse',\n",
       " 'average_rank_orderid_basicroomrank_basic_30days_realratio_reverse_norm',\n",
       " '1month_cont4',\n",
       " 'roomtag_5_1.0',\n",
       " 'roomtag_1_1.0',\n",
       " 'max_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
       " 'roomservice_7_1.0',\n",
       " 'max_rank_orderid_basicroomrank_basic_comment_ratio_norm',\n",
       " 'max_rank_orderby_order_room_rankby_room_price_deduct_reverse',\n",
       " 'max_rank_orderby_order_room_rankby_room_returnvalue_norm',\n",
       " 'max_rank_order_basicroom_roomtag_2',\n",
       " 'max_rank_orderby_order_room_rankby_room_rank_reverse',\n",
       " 'user_roomservice_7_0ratio_3month',\n",
       " 'max_rank_orderby_order_room_rankby_room_rank_norm',\n",
       " 'average_rank_orderby_order_room_rankby_room_price_deduct_reverse_norm',\n",
       " 'roomservice_5_1.0',\n",
       " 'price_last_cont21',\n",
       " 'max_rank_orderid_basicroomrank_basic_30days_realratio_reverse',\n",
       " 'average_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
       " 'max_rank_orderby_order_room_rankby_room_rank',\n",
       " 'max_rank_orderid_basicroomrank_basic_30days_ordnumratio_reverse_norm',\n",
       " 'max_rank_orderby_order_room_rankby_room_price_deduct_norm',\n",
       " 'roomtag_4_lastord_nan',\n",
       " 'max_rank_orderby_order_room_rankby_room_room_30days_ordnumratio_norm',\n",
       " 'price_last_cont18',\n",
       " 'max_rank_orderby_order_room_rankby_room_roomtag_2_norm',\n",
       " 'history_cont5',\n",
       " 'max_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_reverse_norm',\n",
       " 'price_last_conc40',\n",
       " 'min_rank_orderby_order_room_rankby_room_price_deduct_reverse',\n",
       " 'max_rank_orderby_order_room_rankby_room_rank_reverse_norm',\n",
       " '1week_cont4',\n",
       " 'min_rank_order_basicroom_rank',\n",
       " '1month_cont5',\n",
       " 'max_rank_orderid_basicroomrank_basic_recent3_ordernum_ratio_norm',\n",
       " '3month_cont2',\n",
       " 'roomservice_2_1.0',\n",
       " 'history_cont4',\n",
       " 'price_last_cont15']"
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "st.tools.remove_which_cols_pearsonr(result, threshold=0.97)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# train.train_fold(X_df, y_df)\n",
    "# train.train_all(X_df, y_df)\n",
    "# train.train_xgboost_step(X_df, y_df, early_stopping_rounds=50, n_estimators=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0]\tvalidation_0-V:-0.396596\n",
      "Will train until validation_0-V hasn't improved in 50 rounds.\n",
      "[1]\tvalidation_0-V:-0.397628\n",
      "[2]\tvalidation_0-V:-0.402269\n",
      "[3]\tvalidation_0-V:-0.403301\n",
      "[4]\tvalidation_0-V:-0.407942\n",
      "[5]\tvalidation_0-V:-0.411037\n",
      "[6]\tvalidation_0-V:-0.411037\n",
      "[7]\tvalidation_0-V:-0.416194\n",
      "[8]\tvalidation_0-V:-0.4131\n",
      "[9]\tvalidation_0-V:-0.413615\n",
      "[10]\tvalidation_0-V:-0.414131\n",
      "[11]\tvalidation_0-V:-0.416194\n",
      "[12]\tvalidation_0-V:-0.414131\n",
      "[13]\tvalidation_0-V:-0.416194\n",
      "[14]\tvalidation_0-V:-0.416194\n",
      "[15]\tvalidation_0-V:-0.414647\n",
      "[16]\tvalidation_0-V:-0.414647\n",
      "[17]\tvalidation_0-V:-0.415162\n",
      "[18]\tvalidation_0-V:-0.418773\n",
      "[19]\tvalidation_0-V:-0.419804\n",
      "[20]\tvalidation_0-V:-0.418773\n",
      "[21]\tvalidation_0-V:-0.424446\n",
      "[22]\tvalidation_0-V:-0.425477\n",
      "[23]\tvalidation_0-V:-0.427024\n",
      "[24]\tvalidation_0-V:-0.425993\n",
      "[25]\tvalidation_0-V:-0.426509\n",
      "[26]\tvalidation_0-V:-0.42754\n",
      "[27]\tvalidation_0-V:-0.428056\n",
      "[28]\tvalidation_0-V:-0.428056\n",
      "[29]\tvalidation_0-V:-0.428571\n",
      "[30]\tvalidation_0-V:-0.432182\n",
      "[31]\tvalidation_0-V:-0.432182\n",
      "[32]\tvalidation_0-V:-0.435792\n",
      "[33]\tvalidation_0-V:-0.436823\n",
      "[34]\tvalidation_0-V:-0.43837\n",
      "[35]\tvalidation_0-V:-0.43837\n",
      "[36]\tvalidation_0-V:-0.439402\n",
      "[37]\tvalidation_0-V:-0.440949\n",
      "[38]\tvalidation_0-V:-0.440949\n",
      "[39]\tvalidation_0-V:-0.443012\n",
      "[40]\tvalidation_0-V:-0.441465\n",
      "[41]\tvalidation_0-V:-0.441465\n",
      "[42]\tvalidation_0-V:-0.440433\n",
      "[43]\tvalidation_0-V:-0.443528\n",
      "[44]\tvalidation_0-V:-0.444559\n",
      "[45]\tvalidation_0-V:-0.444559\n",
      "[46]\tvalidation_0-V:-0.445075\n",
      "[47]\tvalidation_0-V:-0.445075\n",
      "[48]\tvalidation_0-V:-0.445591\n",
      "[49]\tvalidation_0-V:-0.446622\n",
      "[50]\tvalidation_0-V:-0.449201\n",
      "[51]\tvalidation_0-V:-0.449716\n",
      "[52]\tvalidation_0-V:-0.449716\n",
      "[53]\tvalidation_0-V:-0.448169\n",
      "[54]\tvalidation_0-V:-0.449716\n",
      "[55]\tvalidation_0-V:-0.449716\n",
      "[56]\tvalidation_0-V:-0.449716\n",
      "[57]\tvalidation_0-V:-0.450232\n",
      "[58]\tvalidation_0-V:-0.451779\n",
      "[59]\tvalidation_0-V:-0.452811\n",
      "[60]\tvalidation_0-V:-0.453326\n",
      "[61]\tvalidation_0-V:-0.452811\n",
      "[62]\tvalidation_0-V:-0.454874\n",
      "[63]\tvalidation_0-V:-0.454358\n",
      "[64]\tvalidation_0-V:-0.455389\n",
      "[65]\tvalidation_0-V:-0.454358\n",
      "[66]\tvalidation_0-V:-0.454358\n",
      "[67]\tvalidation_0-V:-0.455389\n",
      "[68]\tvalidation_0-V:-0.454874\n",
      "[69]\tvalidation_0-V:-0.454358\n",
      "[70]\tvalidation_0-V:-0.455905\n",
      "[71]\tvalidation_0-V:-0.456421\n",
      "[72]\tvalidation_0-V:-0.456421\n",
      "[73]\tvalidation_0-V:-0.455389\n",
      "[74]\tvalidation_0-V:-0.455389\n",
      "[75]\tvalidation_0-V:-0.456421\n",
      "[76]\tvalidation_0-V:-0.457452\n",
      "[77]\tvalidation_0-V:-0.457452\n",
      "[78]\tvalidation_0-V:-0.457968\n",
      "[79]\tvalidation_0-V:-0.457968\n",
      "[80]\tvalidation_0-V:-0.457968\n",
      "[81]\tvalidation_0-V:-0.457452\n",
      "[82]\tvalidation_0-V:-0.457968\n",
      "[83]\tvalidation_0-V:-0.457968\n",
      "[84]\tvalidation_0-V:-0.457968\n",
      "[85]\tvalidation_0-V:-0.456421\n",
      "[86]\tvalidation_0-V:-0.457968\n",
      "[87]\tvalidation_0-V:-0.458484\n",
      "[88]\tvalidation_0-V:-0.458999\n",
      "[89]\tvalidation_0-V:-0.458999\n",
      "[90]\tvalidation_0-V:-0.457968\n",
      "[91]\tvalidation_0-V:-0.458484\n",
      "[92]\tvalidation_0-V:-0.457968\n",
      "[93]\tvalidation_0-V:-0.457968\n",
      "[94]\tvalidation_0-V:-0.456937\n",
      "[95]\tvalidation_0-V:-0.456937\n",
      "[96]\tvalidation_0-V:-0.456937\n",
      "[97]\tvalidation_0-V:-0.457968\n",
      "[98]\tvalidation_0-V:-0.458999\n",
      "[99]\tvalidation_0-V:-0.458999\n",
      "[100]\tvalidation_0-V:-0.459515\n",
      "[101]\tvalidation_0-V:-0.458484\n",
      "[102]\tvalidation_0-V:-0.458484\n",
      "[103]\tvalidation_0-V:-0.459515\n",
      "[104]\tvalidation_0-V:-0.460547\n",
      "[105]\tvalidation_0-V:-0.459515\n",
      "[106]\tvalidation_0-V:-0.458484\n",
      "[107]\tvalidation_0-V:-0.458484\n",
      "[108]\tvalidation_0-V:-0.458999\n",
      "[109]\tvalidation_0-V:-0.458484\n",
      "[110]\tvalidation_0-V:-0.457968\n",
      "[111]\tvalidation_0-V:-0.458484\n",
      "[112]\tvalidation_0-V:-0.458484\n",
      "[113]\tvalidation_0-V:-0.458484\n",
      "[114]\tvalidation_0-V:-0.459515\n",
      "[115]\tvalidation_0-V:-0.459515\n",
      "[116]\tvalidation_0-V:-0.459515\n",
      "[117]\tvalidation_0-V:-0.460031\n",
      "[118]\tvalidation_0-V:-0.460031\n",
      "[119]\tvalidation_0-V:-0.460547\n",
      "[120]\tvalidation_0-V:-0.461062\n",
      "[121]\tvalidation_0-V:-0.461062\n",
      "[122]\tvalidation_0-V:-0.461062\n",
      "[123]\tvalidation_0-V:-0.46261\n",
      "[124]\tvalidation_0-V:-0.462094\n",
      "[125]\tvalidation_0-V:-0.461578\n",
      "[126]\tvalidation_0-V:-0.460031\n",
      "[127]\tvalidation_0-V:-0.460547\n",
      "[128]\tvalidation_0-V:-0.461062\n",
      "[129]\tvalidation_0-V:-0.461062\n",
      "[130]\tvalidation_0-V:-0.461062\n",
      "[131]\tvalidation_0-V:-0.461062\n",
      "[132]\tvalidation_0-V:-0.460031\n",
      "[133]\tvalidation_0-V:-0.460031\n",
      "[134]\tvalidation_0-V:-0.460031\n",
      "[135]\tvalidation_0-V:-0.460031\n",
      "[136]\tvalidation_0-V:-0.460031\n",
      "[137]\tvalidation_0-V:-0.460547\n",
      "[138]\tvalidation_0-V:-0.460547\n",
      "[139]\tvalidation_0-V:-0.460547\n",
      "[140]\tvalidation_0-V:-0.462094\n",
      "[141]\tvalidation_0-V:-0.46261\n",
      "[142]\tvalidation_0-V:-0.461578\n",
      "[143]\tvalidation_0-V:-0.46261\n",
      "[144]\tvalidation_0-V:-0.462094\n",
      "[145]\tvalidation_0-V:-0.463125\n",
      "[146]\tvalidation_0-V:-0.463641\n",
      "[147]\tvalidation_0-V:-0.464157\n",
      "[148]\tvalidation_0-V:-0.464157\n",
      "[149]\tvalidation_0-V:-0.464157\n",
      "[150]\tvalidation_0-V:-0.464157\n",
      "[151]\tvalidation_0-V:-0.465188\n",
      "[152]\tvalidation_0-V:-0.463641\n",
      "[153]\tvalidation_0-V:-0.463641\n",
      "[154]\tvalidation_0-V:-0.463641\n",
      "[155]\tvalidation_0-V:-0.463641\n",
      "[156]\tvalidation_0-V:-0.463641\n",
      "[157]\tvalidation_0-V:-0.46261\n",
      "[158]\tvalidation_0-V:-0.46261\n",
      "[159]\tvalidation_0-V:-0.463125\n",
      "[160]\tvalidation_0-V:-0.46261\n",
      "[161]\tvalidation_0-V:-0.464157\n",
      "[162]\tvalidation_0-V:-0.464157\n",
      "[163]\tvalidation_0-V:-0.465188\n",
      "[164]\tvalidation_0-V:-0.465188\n",
      "[165]\tvalidation_0-V:-0.465704\n",
      "[166]\tvalidation_0-V:-0.46622\n",
      "[167]\tvalidation_0-V:-0.463125\n",
      "[168]\tvalidation_0-V:-0.464673\n",
      "[169]\tvalidation_0-V:-0.465704\n",
      "[170]\tvalidation_0-V:-0.465188\n",
      "[171]\tvalidation_0-V:-0.464157\n",
      "[172]\tvalidation_0-V:-0.465704\n",
      "[173]\tvalidation_0-V:-0.465704\n",
      "[174]\tvalidation_0-V:-0.465188\n",
      "[175]\tvalidation_0-V:-0.464673\n",
      "[176]\tvalidation_0-V:-0.464157\n",
      "[177]\tvalidation_0-V:-0.464157\n",
      "[178]\tvalidation_0-V:-0.464157\n",
      "[179]\tvalidation_0-V:-0.464673\n",
      "[180]\tvalidation_0-V:-0.464157\n",
      "[181]\tvalidation_0-V:-0.464157\n",
      "[182]\tvalidation_0-V:-0.464157\n",
      "[183]\tvalidation_0-V:-0.464157\n",
      "[184]\tvalidation_0-V:-0.464157\n",
      "[185]\tvalidation_0-V:-0.463641\n",
      "[186]\tvalidation_0-V:-0.464157\n",
      "[187]\tvalidation_0-V:-0.464157\n",
      "[188]\tvalidation_0-V:-0.464157\n",
      "[189]\tvalidation_0-V:-0.465704\n",
      "[190]\tvalidation_0-V:-0.464673\n",
      "[191]\tvalidation_0-V:-0.464673\n",
      "[192]\tvalidation_0-V:-0.464157\n",
      "[193]\tvalidation_0-V:-0.464157\n",
      "[194]\tvalidation_0-V:-0.464157\n",
      "[195]\tvalidation_0-V:-0.463125\n",
      "[196]\tvalidation_0-V:-0.46261\n",
      "[197]\tvalidation_0-V:-0.463125\n",
      "[198]\tvalidation_0-V:-0.462094\n",
      "[199]\tvalidation_0-V:-0.461578\n",
      "[200]\tvalidation_0-V:-0.462094\n",
      "[201]\tvalidation_0-V:-0.462094\n",
      "[202]\tvalidation_0-V:-0.462094\n",
      "[203]\tvalidation_0-V:-0.461578\n",
      "[204]\tvalidation_0-V:-0.461062\n",
      "[205]\tvalidation_0-V:-0.463641\n",
      "[206]\tvalidation_0-V:-0.463641\n",
      "[207]\tvalidation_0-V:-0.463641\n",
      "[208]\tvalidation_0-V:-0.464157\n",
      "[209]\tvalidation_0-V:-0.463641\n",
      "[210]\tvalidation_0-V:-0.464673\n",
      "[211]\tvalidation_0-V:-0.464673\n",
      "[212]\tvalidation_0-V:-0.465704\n",
      "[213]\tvalidation_0-V:-0.465704\n",
      "[214]\tvalidation_0-V:-0.466735\n",
      "[215]\tvalidation_0-V:-0.466735\n",
      "[216]\tvalidation_0-V:-0.46622\n",
      "[217]\tvalidation_0-V:-0.46622\n",
      "[218]\tvalidation_0-V:-0.46622\n",
      "[219]\tvalidation_0-V:-0.465704\n",
      "[220]\tvalidation_0-V:-0.465704\n",
      "[221]\tvalidation_0-V:-0.465704\n",
      "[222]\tvalidation_0-V:-0.464673\n",
      "[223]\tvalidation_0-V:-0.464673\n",
      "[224]\tvalidation_0-V:-0.464673\n",
      "[225]\tvalidation_0-V:-0.465188\n",
      "[226]\tvalidation_0-V:-0.464673\n",
      "[227]\tvalidation_0-V:-0.464673\n",
      "[228]\tvalidation_0-V:-0.464673\n",
      "[229]\tvalidation_0-V:-0.465188\n",
      "[230]\tvalidation_0-V:-0.464673\n",
      "[231]\tvalidation_0-V:-0.46622\n",
      "[232]\tvalidation_0-V:-0.466735\n",
      "[233]\tvalidation_0-V:-0.466735\n",
      "[234]\tvalidation_0-V:-0.467767\n",
      "[235]\tvalidation_0-V:-0.468283\n",
      "[236]\tvalidation_0-V:-0.468283\n",
      "[237]\tvalidation_0-V:-0.468283\n",
      "[238]\tvalidation_0-V:-0.468283\n",
      "[239]\tvalidation_0-V:-0.467767\n",
      "[240]\tvalidation_0-V:-0.468798\n",
      "[241]\tvalidation_0-V:-0.469314\n",
      "[242]\tvalidation_0-V:-0.468798\n",
      "[243]\tvalidation_0-V:-0.468283\n",
      "[244]\tvalidation_0-V:-0.468283\n",
      "[245]\tvalidation_0-V:-0.467767\n",
      "[246]\tvalidation_0-V:-0.467767\n",
      "[247]\tvalidation_0-V:-0.467767\n",
      "[248]\tvalidation_0-V:-0.468283\n",
      "[249]\tvalidation_0-V:-0.467767\n",
      "[250]\tvalidation_0-V:-0.467767\n",
      "[251]\tvalidation_0-V:-0.467767\n",
      "[252]\tvalidation_0-V:-0.466735\n",
      "[253]\tvalidation_0-V:-0.466735\n",
      "[254]\tvalidation_0-V:-0.46622\n",
      "[255]\tvalidation_0-V:-0.466735\n",
      "[256]\tvalidation_0-V:-0.466735\n",
      "[257]\tvalidation_0-V:-0.467251\n",
      "[258]\tvalidation_0-V:-0.466735\n",
      "[259]\tvalidation_0-V:-0.46622\n",
      "[260]\tvalidation_0-V:-0.46622\n",
      "[261]\tvalidation_0-V:-0.46622\n",
      "[262]\tvalidation_0-V:-0.46622\n",
      "[263]\tvalidation_0-V:-0.46622\n",
      "[264]\tvalidation_0-V:-0.46622\n",
      "[265]\tvalidation_0-V:-0.46622\n",
      "[266]\tvalidation_0-V:-0.46622\n",
      "[267]\tvalidation_0-V:-0.466735\n",
      "[268]\tvalidation_0-V:-0.466735\n",
      "[269]\tvalidation_0-V:-0.467767\n",
      "[270]\tvalidation_0-V:-0.468283\n",
      "[271]\tvalidation_0-V:-0.468283\n",
      "[272]\tvalidation_0-V:-0.468798\n",
      "[273]\tvalidation_0-V:-0.469314\n",
      "[274]\tvalidation_0-V:-0.469314\n",
      "[275]\tvalidation_0-V:-0.46983\n",
      "[276]\tvalidation_0-V:-0.46983\n",
      "[277]\tvalidation_0-V:-0.469314\n",
      "[278]\tvalidation_0-V:-0.468798\n",
      "[279]\tvalidation_0-V:-0.468283\n",
      "[280]\tvalidation_0-V:-0.468283\n",
      "[281]\tvalidation_0-V:-0.466735\n",
      "[282]\tvalidation_0-V:-0.465704\n",
      "[283]\tvalidation_0-V:-0.465704\n",
      "[284]\tvalidation_0-V:-0.465704\n",
      "[285]\tvalidation_0-V:-0.465704\n",
      "[286]\tvalidation_0-V:-0.467767\n",
      "[287]\tvalidation_0-V:-0.467251\n",
      "[288]\tvalidation_0-V:-0.467251\n",
      "[289]\tvalidation_0-V:-0.466735\n",
      "[290]\tvalidation_0-V:-0.46622\n",
      "[291]\tvalidation_0-V:-0.46622\n",
      "[292]\tvalidation_0-V:-0.46622\n",
      "[293]\tvalidation_0-V:-0.466735\n",
      "[294]\tvalidation_0-V:-0.466735\n",
      "[295]\tvalidation_0-V:-0.466735\n",
      "[296]\tvalidation_0-V:-0.466735\n",
      "[297]\tvalidation_0-V:-0.467251\n",
      "[298]\tvalidation_0-V:-0.467251\n",
      "[299]\tvalidation_0-V:-0.467251\n",
      "[300]\tvalidation_0-V:-0.467251\n",
      "[301]\tvalidation_0-V:-0.467251\n",
      "[302]\tvalidation_0-V:-0.467251\n",
      "[303]\tvalidation_0-V:-0.467251\n",
      "[304]\tvalidation_0-V:-0.467767\n",
      "[305]\tvalidation_0-V:-0.467251\n",
      "[306]\tvalidation_0-V:-0.467767\n",
      "[307]\tvalidation_0-V:-0.467251\n",
      "[308]\tvalidation_0-V:-0.467251\n",
      "[309]\tvalidation_0-V:-0.466735\n",
      "[310]\tvalidation_0-V:-0.46622\n",
      "[311]\tvalidation_0-V:-0.46622\n",
      "[312]\tvalidation_0-V:-0.467767\n",
      "[313]\tvalidation_0-V:-0.468283\n",
      "[314]\tvalidation_0-V:-0.468798\n",
      "[315]\tvalidation_0-V:-0.468798\n",
      "[316]\tvalidation_0-V:-0.468283\n",
      "[317]\tvalidation_0-V:-0.468798\n",
      "[318]\tvalidation_0-V:-0.469314\n",
      "[319]\tvalidation_0-V:-0.46983\n",
      "[320]\tvalidation_0-V:-0.46983\n",
      "[321]\tvalidation_0-V:-0.46983\n",
      "[322]\tvalidation_0-V:-0.46983\n",
      "[323]\tvalidation_0-V:-0.469314\n",
      "[324]\tvalidation_0-V:-0.468798\n",
      "[325]\tvalidation_0-V:-0.468798\n",
      "Stopping. Best iteration:\n",
      "[275]\tvalidation_0-V:-0.46983\n",
      "\n"
     ]
    }
   ],
   "source": [
    "model_para = {\n",
    "    'objective': 'rank:pairwise',\n",
    "    'n_estimators': 1000,\n",
    "#     'scale_pos_weight': 5,\n",
    "    'learning_rate': 0.3,\n",
    "}\n",
    "r = train.train_xgboost_step(X_df, y_df, early_stopping_rounds=50, n_estimators=1000, model_para=model_para)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "__x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "model_para_ref = {\n",
    "    'objective': ['rank:pairwise'],\n",
    "    'n_estimators': [371], \n",
    "    'learning_rate': [0.3],\n",
    "}\n",
    "\n",
    "result = train.feature_importance_xgboost(X_df, y_df, 3, model_para_ref=model_para_ref, use_seed=True, verbose=True)\n",
    "\n",
    "print_full(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "result[result.importance==0].index.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [default]",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
